{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "3acc09fe-0466-43f8-9152-df7b19492401",
   "metadata": {},
   "source": [
    "# CSV2Neo4j "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "eb26892c-4a01-4d54-b1e3-fa07e416bb08",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "import os\n",
    "import json\n",
    "\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "04ff0712-b3a7-43a4-9c67-2e4419a680ff",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "df = pd.read_csv('data/medical_data.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "6d24631e-fc33-405b-8d13-8008f86b7eea",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(8808, 23)"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "dbefc985-eaae-4a35-b2d9-5974a19deb6f",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "def print_list(my_list,top=20):\n",
    "    for i, item in enumerate(my_list[:top]):\n",
    "        print(f\"{i + 1}. {item}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "81d1555b-89d5-406e-a3fc-fcb613bf5081",
   "metadata": {},
   "source": [
    "## 实体（节点）"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "0d704900-5dc8-4f37-973b-5ce8c46ad3c7",
   "metadata": {},
   "source": [
    "### 所有症状"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "140b8456-74ee-4d8a-b686-af6ab67b3c89",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "def df2list(key):\n",
    "    data = []\n",
    "    for each in df[key]:\n",
    "        if not isinstance(each, str):  # 如果 each 不是字符串类型\n",
    "            each = str(each)           # 转换为字符串类型\n",
    "        data.extend(each.split(','))\n",
    "    data = set(data)\n",
    "    return data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "aaca38b8-7625-4eb9-a14d-8e7de75f55cf",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "symptoms = df2list('症状')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "57cb4b06-ee94-4a1e-9eef-7ecdc621f8bb",
   "metadata": {},
   "source": [
    "### 所有科室"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "56680f78-9fa4-428a-aa90-6e384533c511",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "departments= df2list('科室')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "477fea89-82b9-430f-9ddb-b8b7906447df",
   "metadata": {},
   "source": [
    "### 实体：所有检查"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "ff20249b-466f-4e52-9d95-8455cc8767e7",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "checks = df2list('检查')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "84c95098-ca1d-48cb-a3bd-9226d1e109c7",
   "metadata": {},
   "source": [
    "### 实体：所有药物"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "941b32db-c0f5-45cf-91c6-da922f3f764c",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "drugs = []\n",
    "for each in df['推荐药物']:\n",
    "    try:\n",
    "        drugs.extend(each.split(','))\n",
    "    except:\n",
    "        pass\n",
    "for each in df['常用药物']:\n",
    "    try:\n",
    "        drugs.extend(each.split(','))\n",
    "    except:\n",
    "        pass\n",
    "drugs = set(drugs)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "5e541422-656e-4f8e-becb-d52328f111c5",
   "metadata": {},
   "source": [
    "### 实体：所有食物"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "e4fa0d2f-4b17-49ae-8022-bd4681cfb46b",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "foods = []\n",
    "for each in df['可以吃']:\n",
    "    try:\n",
    "        foods.extend(each.split(','))\n",
    "    except:\n",
    "        pass\n",
    "for each in df['不可以吃']:\n",
    "    try:\n",
    "        foods.extend(each.split(','))\n",
    "    except:\n",
    "        pass\n",
    "for each in df['推荐吃']:\n",
    "    try:\n",
    "        foods.extend(each.split(','))\n",
    "    except:\n",
    "        pass\n",
    "foods = set(foods)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "685abcfc-3aa1-469d-af92-ddc582792372",
   "metadata": {},
   "source": [
    "### 实体：所有药物厂商"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "d79d2b15-0f4b-4b20-aa2b-bbb09e822f2c",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "producers = []\n",
    "\n",
    "for each in df['具体药物']:\n",
    "    try:\n",
    "        for each_drug in each.split(','):\n",
    "            producer = each_drug.split('(')[0]\n",
    "            producers.append(producer)\n",
    "    except:\n",
    "        pass\n",
    "producers = set(producers)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "8448deaa-235d-4703-926f-3164a3a35a4b",
   "metadata": {},
   "source": [
    "### 疾病字典信息"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "5d64c82b-3afd-457a-b247-c46b2caf05fe",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "disease_infos = [] # 疾病信息\n",
    "for idx, row in df.iterrows():\n",
    "    disease_infos.append(dict(row))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "954179c8-1cbd-41d7-856a-4eb6a228d544",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "dict_keys(['疾病名称', '疾病描述', '疾病种类', '科室', '病因', '症状', '检查', '并发症', '花费', '疗程', '疗法', '治愈率', '易感人群', '感染概率', '感染途径', '预防措施', '推荐药物', '常用药物', '具体药物', '可以吃', '不可以吃', '推荐吃', '是否纳入医保'])"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dict(row).keys()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "2d73a82f-452b-405b-9607-d1b97e16cb84",
   "metadata": {},
   "source": [
    "## 关系（边）"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "215a365f-4f04-4172-9d02-d8baee8c2a9c",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "def deduplicate(rels_old):\n",
    "    '''关系去重函数'''\n",
    "    rels_new = []\n",
    "    for each in rels_old:\n",
    "        if each not in rels_new:\n",
    "            rels_new.append(each)\n",
    "    return rels_new"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "835ebee3-e9a6-4187-9ce3-774e79972a6d",
   "metadata": {},
   "source": [
    "### 关系：疾病-检查"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "ea50c57f-3383-4cc9-965c-09e32d526f86",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "rels_check = []\n",
    "for idx, row in df.iterrows():\n",
    "    checks = row['检查']\n",
    "    if not isinstance(checks, str):  # 如果 checks 不是字符串类型\n",
    "        checks = str(checks)         # 转换为字符串类型\n",
    "    for each in checks.split(','):\n",
    "        rels_check.append([row['疾病名称'], each])\n",
    "rels_check = deduplicate(rels_check)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "9231524f-0d1c-42b4-9de1-38b7a3798544",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1. ['肺泡蛋白质沉积症', '胸部CT检查']\n",
      "2. ['肺泡蛋白质沉积症', '肺活检']\n",
      "3. ['肺泡蛋白质沉积症', '支气管镜检查']\n",
      "4. ['百日咳', '耳、鼻、咽拭子细菌培养']\n",
      "5. ['百日咳', '周围血白细胞计数及分类检验']\n",
      "6. ['百日咳', '血常规']\n",
      "7. ['百日咳', '酶联免疫吸附试验']\n",
      "8. ['百日咳', '白细胞分类计数']\n",
      "9. ['苯中毒', '血常规']\n",
      "10. ['苯中毒', '骨髓象分析']\n",
      "11. ['苯中毒', '先令氏指数']\n",
      "12. ['喘息样支气管炎', '肺部检查']\n",
      "13. ['喘息样支气管炎', '肺和胸膜听诊']\n",
      "14. ['喘息样支气管炎', '抗链球菌型M蛋白抗体']\n",
      "15. ['喘息样支气管炎', '抗链球菌壁多糖抗体']\n",
      "16. ['喘息样支气管炎', '酶联免疫吸附试验']\n",
      "17. ['成人呼吸窘迫综合征', '胸部CT检查']\n",
      "18. ['成人呼吸窘迫综合征', '呼吸肌功能测定']\n",
      "19. ['成人呼吸窘迫综合征', '血浆蛋白C抗原']\n",
      "20. ['成人呼吸窘迫综合征', '肺泡气-动脉血氧分压差']\n"
     ]
    }
   ],
   "source": [
    "print_list(rels_check)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "ab2ba620-f745-47d6-9a25-db919d04fa5c",
   "metadata": {},
   "source": [
    "### 关系：疾病-症状"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "a44d5fcd-84a4-4438-8a6d-8610bd9d05bc",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "rels_symptom = []\n",
    "for idx, row in df.iterrows():\n",
    "    symptoms = row['症状']\n",
    "    if not isinstance(symptoms, str):  # 如果 symptoms 不是字符串类型\n",
    "        symptoms = str(symptoms)       # 转换为字符串类型\n",
    "    for each in symptoms.split(','):\n",
    "        rels_symptom.append([row['疾病名称'], each])\n",
    "rels_symptom = deduplicate(rels_symptom)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "b734e05d-0954-484b-a6bb-002043ddc943",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1. ['肺泡蛋白质沉积症', '紫绀']\n",
      "2. ['肺泡蛋白质沉积症', '胸痛']\n",
      "3. ['肺泡蛋白质沉积症', '呼吸困难']\n",
      "4. ['肺泡蛋白质沉积症', '乏力']\n",
      "5. ['肺泡蛋白质沉积症', '毓卓']\n",
      "6. ['百日咳', '吸气时有蝉鸣音']\n",
      "7. ['百日咳', '痉挛性咳嗽']\n",
      "8. ['百日咳', '胸闷']\n",
      "9. ['百日咳', '肺阴虚']\n",
      "10. ['百日咳', '抽搐']\n",
      "11. ['百日咳', '低热']\n",
      "12. ['百日咳', '闫鹏辉']\n",
      "13. ['百日咳', '惊厥']\n",
      "14. ['苯中毒', '恶心']\n",
      "15. ['苯中毒', '抽搐']\n",
      "16. ['苯中毒', '感觉障碍']\n",
      "17. ['喘息样支气管炎', '耸肩喘息']\n",
      "18. ['喘息样支气管炎', '哮鸣音']\n",
      "19. ['喘息样支气管炎', '纤毛上皮细胞损伤脱落']\n",
      "20. ['喘息样支气管炎', '变应性咳嗽']\n"
     ]
    }
   ],
   "source": [
    "print_list(rels_symptom)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c657a075-c2ec-4eab-a342-ed3f3885bd4d",
   "metadata": {},
   "source": [
    "### 关系：疾病-疾病（并发症）"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "259077dc-85b6-4e82-8662-44289a9c866d",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "rels_acompany = []\n",
    "for idx, row in df.iterrows():\n",
    "    acompany = row['并发症']\n",
    "    if not isinstance(acompany, str):  # 如果 acompany 不是字符串类型\n",
    "        acompany = str(acompany)       # 转换为字符串类型\n",
    "    for each in acompany.split(','):\n",
    "        rels_acompany.append([row['疾病名称'], each])\n",
    "rels_acompany = deduplicate(rels_acompany)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "a9156ef3-b22d-46cc-a924-7c6bcbe7b4ec",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1. ['肺泡蛋白质沉积症', '多重肺部感染']\n",
      "2. ['百日咳', '肺不张']\n",
      "3. ['苯中毒', '贫血']\n",
      "4. ['喘息样支气管炎', '支气管哮喘']\n",
      "5. ['成人呼吸窘迫综合征', '细菌性肺炎']\n",
      "6. ['大量羊水吸入', '呼吸衰竭']\n",
      "7. ['单纯性肺嗜酸粒细胞浸润症', '胆道蛔虫病']\n",
      "8. ['大叶性肺炎', '脓胸']\n",
      "9. ['大楼病综合征', '抑郁症']\n",
      "10. ['二硫化碳中毒', '昏迷']\n",
      "11. ['肺-胸膜阿米巴病', '阿米巴肝脓肿']\n",
      "12. ['肺出血－肾炎综合征', '便血']\n",
      "13. ['肺放线菌病', '膈下脓肿']\n",
      "14. ['肺泡蛋白沉着症', '呼吸衰竭']\n",
      "15. ['肺曲菌病', '过敏性鼻炎']\n",
      "16. ['放射性肺炎', '肺气肿']\n",
      "17. ['肺念珠菌病', '菌血症']\n",
      "18. ['肺大疱', '张力性气胸']\n",
      "19. ['肺炎球菌肺炎', '败血症']\n",
      "20. ['肺气肿', '呼吸衰竭']\n"
     ]
    }
   ],
   "source": [
    "print_list(rels_acompany)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "9377f16f-7b87-44ea-a550-d1946492df27",
   "metadata": {},
   "source": [
    "### 关系：疾病-推荐药物"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "e2ad232c-d9e1-41cf-8043-d0b300261186",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "rels_recommanddrug = []\n",
    "for idx, row in df.iterrows():\n",
    "    try:\n",
    "        for each in row['推荐药物'].split(','):\n",
    "            rels_recommanddrug.append([row['疾病名称'], each])\n",
    "    except:\n",
    "        pass\n",
    "rels_recommanddrug = deduplicate(rels_recommanddrug)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "23f6d957-9bc1-487a-9470-5ea50bfd5932",
   "metadata": {},
   "source": [
    "### 关系：疾病-常用药物"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "e0b88509-fd18-4e1f-8090-f9f7fdd9d2bf",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "rels_commonddrug = []\n",
    "for idx, row in df.iterrows():\n",
    "    try:\n",
    "        for each in row['常用药物'].split(','):\n",
    "            rels_commonddrug.append([row['疾病名称'], each])\n",
    "    except:\n",
    "        pass\n",
    "rels_commonddrug = deduplicate(rels_commonddrug)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "69963493-56bc-436d-ba82-c4db374db7ed",
   "metadata": {},
   "source": [
    "### 关系：疾病-不可以吃"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "57a7dc2d-3ca2-47d4-a6b9-49b37efd15ab",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "rels_noteat = []\n",
    "for idx, row in df.iterrows():\n",
    "    try:\n",
    "        for each in row['不可以吃'].split(','):\n",
    "            rels_noteat.append([row['疾病名称'], each])\n",
    "    except:\n",
    "        pass\n",
    "rels_noteat = deduplicate(rels_noteat)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "d5560d6c-3f17-4f36-9990-7f23a9b12f77",
   "metadata": {},
   "source": [
    "### 关系：疾病-可以吃"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "id": "581db30e-ad0f-4bb4-a8ac-cbf48e8b2b0b",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "rels_doeat = []\n",
    "for idx, row in df.iterrows():\n",
    "    try:\n",
    "        for each in row['可以吃'].split(','):\n",
    "            rels_doeat.append([row['疾病名称'], each])\n",
    "    except:\n",
    "        pass\n",
    "rels_doeat = deduplicate(rels_doeat)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "e7ee7548-6caf-4878-9aae-1fcef0f94006",
   "metadata": {},
   "source": [
    "### 关系：疾病-推荐吃"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "id": "dbf58e4b-5601-4f22-9e86-ef91123b2eef",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "rels_recommandeat = []\n",
    "for idx, row in df.iterrows():\n",
    "    try:\n",
    "        for each in row['推荐吃'].split(','):\n",
    "            rels_recommandeat.append([row['疾病名称'], each])\n",
    "    except:\n",
    "        pass\n",
    "rels_recommandeat = deduplicate(rels_recommandeat)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "2449ac40-910f-4c4c-9614-34457dbf3e64",
   "metadata": {},
   "source": [
    "### 关系：药物厂商-具体药物"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "id": "0632c7ef-6f46-438a-8f18-dfe79d626029",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "\n",
    "rels_drug_producer = []\n",
    "for each in df['具体药物']:\n",
    "    try:\n",
    "        for each_drug in each.split(','):\n",
    "            producer = each_drug.split('(')[0]\n",
    "            drug = each_drug.split('(')[1][:-1]\n",
    "            rels_drug_producer.append([producer, drug])\n",
    "    except:\n",
    "        pass\n",
    "rels_drug_producer = deduplicate(rels_drug_producer)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "92c300a6-ca9b-4f72-947b-09de2156d550",
   "metadata": {},
   "source": [
    "### 关系：疾病-科室、小科室-大科室"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "id": "c9f81778-05c3-4a43-b392-4129678c7ee4",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "rels_category = []  # 关系：疾病-科室\n",
    "rels_department = []  # 关系：小科室-大科室\n",
    "for idx, row in df.iterrows():\n",
    "    department = row['科室']\n",
    "    if not isinstance(department, str):  # 如果 department 不是字符串类型\n",
    "        department = str(department)     # 转换为字符串类型\n",
    "    if len(department.split(',')) == 1:\n",
    "        rels_category.append([row['疾病名称'], department])\n",
    "    else:\n",
    "        big = department.split(',')[0]  # 大科室\n",
    "        small = department.split(',')[1]  # 小科室\n",
    "        rels_category.append([row['疾病名称'], small])\n",
    "        rels_department.append([small, big])\n",
    "rels_category = deduplicate(rels_category)\n",
    "rels_department = deduplicate(rels_department)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "id": "8e40e1b4-8d92-4a3d-a68c-5aa460c0873e",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1. ['肺泡蛋白质沉积症', '呼吸内科']\n",
      "2. ['百日咳', '小儿内科']\n",
      "3. ['苯中毒', '急诊科']\n",
      "4. ['喘息样支气管炎', '呼吸内科']\n",
      "5. ['成人呼吸窘迫综合征', '呼吸内科']\n",
      "6. ['大量羊水吸入', '小儿内科']\n",
      "7. ['单纯性肺嗜酸粒细胞浸润症', '呼吸内科']\n",
      "8. ['大叶性肺炎', '呼吸内科']\n",
      "9. ['大楼病综合征', '其他综合']\n",
      "10. ['二硫化碳中毒', '急诊科']\n",
      "11. ['肺-胸膜阿米巴病', '呼吸内科']\n",
      "12. ['肺出血－肾炎综合征', '呼吸内科']\n",
      "13. ['肺放线菌病', '呼吸内科']\n",
      "14. ['肺泡蛋白沉着症', '呼吸内科']\n",
      "15. ['肺曲菌病', '呼吸内科']\n",
      "16. ['放射性肺炎', '呼吸内科']\n",
      "17. ['肺念珠菌病', '呼吸内科']\n",
      "18. ['肺大疱', '呼吸内科']\n",
      "19. ['肺炎球菌肺炎', '呼吸内科']\n",
      "20. ['肺气肿', '呼吸内科']\n"
     ]
    }
   ],
   "source": [
    "print_list(rels_category)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "id": "c62ebecd-ef7f-4242-95e1-d44e6d8129d6",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1. ['呼吸内科', '内科']\n",
      "2. ['小儿内科', '儿科']\n",
      "3. ['其他综合', '其他科室']\n",
      "4. ['肿瘤内科', '肿瘤科']\n",
      "5. ['心胸外科', '外科']\n",
      "6. ['感染科', '外科']\n",
      "7. ['儿科综合', '儿科']\n",
      "8. ['产科', '妇产科']\n",
      "9. ['普外科', '外科']\n",
      "10. ['心内科', '内科']\n",
      "11. ['肿瘤外科', '肿瘤科']\n",
      "12. ['神经内科', '内科']\n",
      "13. ['风湿免疫科', '内科']\n",
      "14. ['眼科', '五官科']\n",
      "15. ['内分泌科', '内科']\n",
      "16. ['小儿外科', '儿科']\n",
      "17. ['耳鼻喉科', '五官科']\n",
      "18. ['妇科', '妇产科']\n",
      "19. ['康复科', '其他科室']\n",
      "20. ['消化内科', '内科']\n"
     ]
    }
   ],
   "source": [
    "print_list(rels_department)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "2d7b867e-fdfa-4717-b85b-a1d1b1b4f12a",
   "metadata": {},
   "source": [
    "## 链接图数据库"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "id": "e54f42f5-27f3-48cc-ada5-67854591a220",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "from neo4j_driver import Neo4jConnection, Node"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "id": "ff243da9-2d79-43aa-96bb-a5428cfb50e4",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "g = Neo4jConnection('neo4j://localhost:7687/', 'neo4j', 'Lorne@2022')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "ad9f8705-cbb8-42be-a688-a368099f9a98",
   "metadata": {},
   "source": [
    "### 创建疾病实体"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "id": "3f2ff673-d970-43ec-8bd5-1e7f35830739",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "共创建 8808 个疾病实体\n"
     ]
    }
   ],
   "source": [
    "count = 0\n",
    "for disease_dict in disease_infos:\n",
    "    try:\n",
    "        node = Node(\"Disease\",\n",
    "                    name=disease_dict['疾病名称'],\n",
    "                    desc=disease_dict['疾病描述'],\n",
    "                    prevent=disease_dict['预防措施'],\n",
    "                    cause=disease_dict['病因'],\n",
    "                    easy_get=disease_dict['易感人群'],\n",
    "                    cure_lasttime=disease_dict['疗程'],\n",
    "                    cure_department=disease_dict['科室'],\n",
    "                    cure_way=disease_dict['疗法'], \n",
    "                    cured_prob=disease_dict['治愈率'])\n",
    "        g.create(node)\n",
    "        count += 1\n",
    "        # print('创建疾病实体：', disease_dict['疾病名称'])\n",
    "    except:\n",
    "        pass\n",
    "print('共创建 {} 个疾病实体'.format(count))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "5900cc0d-de2d-4a3c-9371-a7825a349538",
   "metadata": {},
   "source": [
    "### 创建药物实体"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "id": "2ab2965b-293a-41ab-bf63-482ffd0904ab",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "共创建 3829 个药物实体\n"
     ]
    }
   ],
   "source": [
    "count = 0\n",
    "for each in drugs:\n",
    "    node = Node('Drug', name=each)\n",
    "    g.create(node)\n",
    "    count += 1\n",
    "    # print('创建实体 {}'.format(each))\n",
    "print('共创建 {} 个药物实体'.format(count))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c8a41d3b-efcd-40ca-8ba6-cbd8c91517b8",
   "metadata": {},
   "source": [
    "### 创建食物实体"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "id": "6c1de144-97f6-4c24-a363-9ad6e613c54a",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "共创建 4870 个食物实体\n"
     ]
    }
   ],
   "source": [
    "count = 0\n",
    "for each in foods:\n",
    "    node = Node('Food', name=each)\n",
    "    g.create(node)\n",
    "    count += 1\n",
    "    # print('创建实体 {}'.format(each))\n",
    "print('共创建 {} 个食物实体'.format(count))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "4b433f37-24a2-4fe9-ab3d-e754a45b839d",
   "metadata": {},
   "source": [
    "### 创建检查实体"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "id": "4ff559ab-e898-4fb1-ac83-6877503346ca",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "共创建 57 个检查实体\n"
     ]
    }
   ],
   "source": [
    "count = 0\n",
    "for each in checks:\n",
    "    node = Node('Check', name=each)\n",
    "    g.create(node)\n",
    "    count += 1\n",
    "    # print('创建实体 {}'.format(each))\n",
    "print('共创建 {} 个检查实体'.format(count))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "ddab10ef-c76d-468d-8166-c0059e4927d4",
   "metadata": {},
   "source": [
    "### 创建科室实体"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "id": "d80b21fa-d5dd-474e-be96-7e05965e08de",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "共创建 55 个科室实体\n"
     ]
    }
   ],
   "source": [
    "count = 0\n",
    "for each in departments:\n",
    "    node = Node('Department', name=each)\n",
    "    g.create(node)\n",
    "    count += 1\n",
    "    # print('创建实体 {}'.format(each))\n",
    "print('共创建 {} 个科室实体'.format(count))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "8416df0e-34b7-415a-b834-ff1556856010",
   "metadata": {},
   "source": [
    "### 创建 药物厂商 实体"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "id": "100d0769-c819-4773-b495-b5001d208de0",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "共创建 17202 个厂商实体\n"
     ]
    }
   ],
   "source": [
    "count = 0\n",
    "for each in producers:\n",
    "    node = Node('Producer', name=each)\n",
    "    g.create(node)\n",
    "    count += 1\n",
    "    # print('创建实体 {}'.format(each))\n",
    "print('共创建 {} 个厂商实体'.format(count))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "712d6ab2-972e-470c-9779-eeedb0e0471c",
   "metadata": {},
   "source": [
    "### 创建 症状 实体"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "id": "fdc7e6bd-7ede-4c68-8c56-ff109c953777",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "共创建 23 个症状实体\n"
     ]
    }
   ],
   "source": [
    "count = 0\n",
    "for each in symptoms:\n",
    "    node = Node('Symptom', name=each)\n",
    "    g.create(node)\n",
    "    count += 1\n",
    "    # print('创建实体 {}'.format(each))\n",
    "print('共创建 {} 个症状实体'.format(count))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "e2c57bd1-7a6f-4197-b932-6c9be991b918",
   "metadata": {},
   "source": [
    "## 创建知识图谱关系（连接、边）"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "28e6fc3f-b359-412a-96ed-f4854162433a",
   "metadata": {},
   "source": [
    "友情提示：这一步需要花费很长的时间，不过不同的电脑配置需要的时间也会有所不同，差不多需要执行40多分钟的时间"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "id": "dce00254-aa91-4558-814f-c30cf5e3c0d1",
   "metadata": {},
   "outputs": [],
   "source": [
    "g.relationship('Disease', 'Food', rels_recommandeat, 'recommand_eat', '推荐食谱')\n",
    "g.relationship('Disease', 'Food', rels_noteat, 'no_eat', '忌吃')\n",
    "g.relationship('Disease', 'Food', rels_doeat, 'do_eat', '宜吃')\n",
    "g.relationship('Department', 'Department', rels_department, 'belongs_to', '属于')\n",
    "g.relationship('Disease', 'Drug', rels_commonddrug, 'common_drug', '常用药品')\n",
    "g.relationship('Producer', 'Drug', rels_drug_producer, 'drugs_of', '生产药品')\n",
    "g.relationship('Disease', 'Drug', rels_recommanddrug, 'recommand_drug', '好评药品')\n",
    "g.relationship('Disease', 'Check', rels_check, 'need_check', '诊断检查')\n",
    "g.relationship('Disease', 'Symptom', rels_symptom, 'has_symptom', '症状')\n",
    "g.relationship('Disease', 'Disease', rels_acompany, 'acompany_with', '并发症')\n",
    "g.relationship('Disease', 'Department', rels_category, 'belongs_to', '所属科室')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "id": "ed5a4afe-d993-4d5d-adda-ff95cf3805a2",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "数据库中的节点总数： 34844\n"
     ]
    }
   ],
   "source": [
    "print(\"数据库中的节点总数：\", g.counts())"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
